 /*
PROGRAM BY: JEFRREY WANG
Last edited 11/25/2019

This program merges CMF-LBD data with 2007 CMF materials trailers

Output is a CMF-MAT match by firm id (cfn, survu_id) and material code (mnaics) in 2007, at cmf_mat_match2007

*/


%include 'yyyy/pdata.sas';

options obs=max;
    

libname ecroot 'xxxx/data';
%macro link_prod(y);
* LOAD PRODUCTS DATASET;
%if &y.=1992 %then %do;
data prod&y.(keep = cfn curpc pv);
    set pdata.cmf&y.prod;
    * only keep Usable and Corrected, clean out Not Usable, Deleted, and Illegal;
    if curpc~= " ";
    if pv>0; 
run; 

* make sure to collapse to unique cfn and mnaics;
   
proc means data=prod&y. NWAY NOPRINT;
    class cfn curpc;
    var pv;
    output out=prod&y. sum=;
run;

data goodplants&y. (keep= year cfn sales firmid cf cp cm cw cr ee emp fk_naics12 sic_cmf naics_cmf bestnaics);
    set ecroot.estabs_all_clean (rename=(sic=sic_cmf));
    if year=&y.;
run;
%end;

%if &y.=1997 %then %do;
data prod&y.(keep = cfn naicspc pv);
    set pdata.cmf&y.prod;
    if pv>0; 
run; 
    
* make sure to collapse to unique cfn and naicspc;
   
proc means data=prod&y. NWAY NOPRINT;
    class cfn naicspc;
    var pv;
    output out=prod&y. sum=;
run;
    
data goodplants&y. (keep= year cfn sales firmid cf cp cm cw cr ee emp fk_naics12 naics_cmf bestnaics);
    set ecroot.estabs_all_clean (rename=(naics97_census=naics_cmf));
    if year=&y.;
run;
%end;
    
%if &y. = 2002 %then %do;    
data prod&y.(keep = survu_id naicspc pv rename=(survu_id=cfn));
    set pdata.cmf&y.prod;
    * only keep Usable and Corrected, clean out Not Usable, Deleted, and Illegal;
    if pv>0; 
run; 

* make sure to collapse to unique cfn and naicspc;
   
proc means data=prod&y. NWAY NOPRINT;
    class cfn naicspc;
    var pv;
    output out=prod&y. sum=;
run;

* Pull good plants from estabs2007_clean;
* Only plants with matcfn=1 are included;


data goodplants&y. (keep= year cfn sales firmid cf cp cm cw cr ee emp fk_naics12 naics_cmf bestnaics);
    set ecroot.estabs_all_clean (rename=(naics02_census=naics_cmf));
    if year=&y.;
run;
%end;
    

%if &y. = 2007 %then %do;    
data prod&y.(keep = survu_id naicspc pv rename=(survu_id=cfn));
    set pdata.cmf&y.prod;
    * only keep Usable and Corrected, clean out Not Usable, Deleted, and Illegal;
    if usable_prod="U" | usable_prod="C";
    if pv>0; 
run; 

* make sure to collapse to unique cfn and mnaics;
   
proc means data=prod&y. NWAY NOPRINT;
    class cfn naicspc;
    var pv;
    output out=prod&y. sum=;
run;

* Pull good plants from estabs2007_clean;



data goodplants&y. (keep= year cfn sales firmid cf cp cm cw cr ee emp fk_naics12 naics_cmf bestnaics);
    set ecroot.estabs_all_clean (rename=(naics07_census=naics_cmf));
    if year=&y.;
run;
%end;
    
%if &y. = 2012 %then %do;    
data prod&y.(keep = survu_id naicspc pv rename=(survu_id=cfn));
    set pdata.cmf&y.prod;
    * only keep Usable and Corrected, clean out Not Usable, Deleted, and Illegal;
    if usable_prod="U" | usable_prod="C";
    if pv>0; 
run; 

* make sure to collapse to unique cfn and naicspc;
    
proc means data=prod&y. NWAY NOPRINT;
    class cfn naicspc;
    var pv;
    output out=prod&y. sum=;
run;

* Pull good plants from estabs2007_clean;

data goodplants&y. (keep= year cfn sales firmid cf cp cm cw cr ee emp fk_naics12 naics_cmf bestnaics);
    set ecroot.estabs_all_clean (rename=(naics12_census=naics_cmf));
    if year=&y.;
run;
%end;
    
* merge mat data set with info from cmf;
data ecroot.cmf_prod_match&y.;
    merge prod&y.(in=a) goodplants&y.(in=b);
    by cfn;
    if a=1 & b=1;
run;
%mend;

%macro loop1();
  %do y=1992 %to 2012 %by 5;
     %link_prod(&y.);
  %end;
%mend;
%loop1();
